From b95f8d343c617ac3371dc83f3778a4fdf544ace9 Mon Sep 17 00:00:00 2001 From: "Karl O. Pinc kop@karlpinc.com" Date: Fri, 24 Oct 2025 05:48:35 +0000 Subject: [PATCH] Update nesting state in clean during conversion; Problems #26 and #27 --- conversion/clean.sql | 155 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 155 insertions(+) diff --git a/conversion/clean.sql b/conversion/clean.sql index 075f6d7..d4bc793 100644 --- a/conversion/clean.sql +++ b/conversion/clean.sql @@ -269,3 +269,158 @@ UPDATE follow_arrival UPDATE follow_arrival SET fa_data_source = 'Tiki' WHERE fa_data_source = 'TikI'; + +-- Problems #26 and #27 +-- There can be a mis-match between the first follow_arrival row for the +-- focal and the follow row, regarding whether the focal started the +-- follow in a nest. If either says the focal was in a nest then update +-- the follow_arrival so that the focal was in the nest. (The follow_arrival +-- value is then used in the conversion, not the follow value.) +-- +-- Note that we know that follow.fol_flag_begin_in_nest and +-- follow.fol_flag_end_in_nest are always 0 or 1, because, even though +-- the program that dumps the MS Access data produces an INT, they +-- are a boolean in MS Access. +-- We also check, in load_follow_arrival_sanity.sql, that +-- follow_arrival.fa_type_of_nesting is 0, 1, 2, or 3, and nothing else. +WITH spans AS + (SELECT follow_arrival.fa_fol_date + , follow_arrival.fa_fol_b_focal_animid + , follow_arrival.fa_b_arr_animid + , MIN(follow_arrival.fa_time_start) AS min_start + , MAX(follow_arrival.fa_time_end) AS max_end + FROM follow_arrival + WHERE follow_arrival.fa_b_arr_animid + = follow_arrival.fa_fol_b_focal_animid + GROUP BY follow_arrival.fa_fol_date + , follow_arrival.fa_fol_b_focal_animid + , follow_arrival.fa_b_arr_animid) + -- Use a separate query to find min/max seq numbers + -- The first (last) seq number may not belong to the + -- minimal start (maximal stop) time, _and_, the + -- follow arrivals are not unique per date, focal, + -- arriving animal. The seq number distinguishes + -- date, focal, arriving animal. +, min_seqs AS + (SELECT follow_arrival.fa_fol_date + , follow_arrival.fa_fol_b_focal_animid + , follow_arrival.fa_b_arr_animid + , MIN(follow_arrival.fa_seq_num) AS min_seq + FROM follow_arrival + JOIN spans ON ( + follow_arrival.fa_fol_date = spans.fa_fol_date + AND follow_arrival.fa_fol_b_focal_animid = spans.fa_fol_b_focal_animid + AND follow_arrival.fa_b_arr_animid = spans.fa_fol_b_focal_animid + AND follow_arrival.fa_time_start = spans.min_start) + GROUP BY follow_arrival.fa_fol_date + , follow_arrival.fa_fol_b_focal_animid + , follow_arrival.fa_b_arr_animid) +, max_seqs AS + (SELECT follow_arrival.fa_fol_date + , follow_arrival.fa_fol_b_focal_animid + , follow_arrival.fa_b_arr_animid + , MAX(follow_arrival.fa_seq_num) AS max_seq + FROM follow_arrival + JOIN spans ON ( + follow_arrival.fa_fol_date = spans.fa_fol_date + AND follow_arrival.fa_fol_b_focal_animid = spans.fa_fol_b_focal_animid + AND follow_arrival.fa_b_arr_animid = spans.fa_fol_b_focal_animid + AND follow_arrival.fa_time_end = spans.max_end) + GROUP BY follow_arrival.fa_fol_date + , follow_arrival.fa_fol_b_focal_animid + , follow_arrival.fa_b_arr_animid) +UPDATE follow_arrival + SET fa_type_of_nesting = + (SELECT + CASE + WHEN min_nums.min_seq = max_nums.max_seq THEN + -- Only one follow_arrival for the focal in the whole follow + CASE + WHEN (follow.fol_flag_begin_in_nest = 1 + AND follow.fol_flag_end_in_nest = 1) + OR (follow.fol_flag_begin_in_nest = 1 + AND -- in @ end, only + follow_arrival.fa_type_of_nesting = 2) + OR (follow.fol_flag_end_in_nest = 1 + AND -- in @ start, only + follow_arrival.fa_type_of_nesting = 1) + THEN 3 -- in @ both + WHEN follow.fol_flag_begin_in_nest = 1 + -- follow.fol_flag_end_in_nest = 0, because WHEN, above + -- Our WHERE clause ensures that + -- follow_arrival.fa_type_of_nesting is not 1, + -- in @ start only, and not 3, in @ both, + -- and our 1st WHEN clause above ensures + -- not 2, in at end, so that leaves 0, not in, + -- only. + THEN 1 -- in @ start, only + ELSE + -- Following the logic of the 2nd WHEN, above, + -- follow.fol_flag_end_in_nest = 1 + -- and follow_arrival.fa_type_of_nesting = 0 + 2 -- in @ end, only + END + WHEN follow_arrival.fa_seq_num = min_nums.min_seq THEN + -- The first of multiple follow_arrivals of the focal in the + -- follow. We are interested in updating only the + -- start-in-nest state. We know it's wrong, erring on the + -- side of the follow arrival not showing start-in-nest, + -- because of the WHERE clause. So + -- follow.start_in_nest = 1, AND follow_arrival.fa_type_of_nesting + -- is 0, not in nest, or 2, in @ end, only. + CASE + WHEN follow_arrival.fa_type_of_nesting = 0 -- not in nest + THEN 1 -- in @ start, only + ELSE -- follow_arrival.fa_type_of_nesting = 2, in @ end, only + 3 -- in @ both + END + ELSE -- follow_arrival.fa_seq_num = max_nums.max_seq, + -- because of the WHEN clauses, above. + -- The last of multiple follow_arrivals of the focal in the + -- follow. We are interested only in updating the + -- end-in-nest state. We know it's wrong, erring on the + -- side of the follow arrival not showing end-in-nest, + -- because of the WHERE clause. So + -- follow.end_in_nest = 1, AND follow_arrival.fa_type_of_nesting + -- is 0, not in nest, or 1, in @ start, only. + CASE + WHEN follow_arrival.fa_type_of_nesting = 0 -- not in nest + THEN 2 -- in @ end, only + ELSE -- follow_arrival.fa_type_of_nesting = 1, in @ start, only + 3 -- in @ both + END + END) + FROM spans + JOIN follow + ON (follow.fol_date = spans.fa_fol_date + AND follow.fol_b_animid = spans.fa_fol_b_focal_animid) + , LATERAL ( -- Stick a min_nums.min_seq column on the end of each row + SELECT min_seqs.min_seq + FROM min_seqs + WHERE min_seqs.fa_fol_date = spans.fa_fol_date + AND min_seqs.fa_fol_b_focal_animid = spans.fa_fol_b_focal_animid + AND min_seqs.fa_b_arr_animid = spans.fa_b_arr_animid + ) AS min_nums + , LATERAL ( -- Stick a max_nums.max_seq column on the end of each row + SELECT max_seqs.max_seq + FROM max_seqs + WHERE max_seqs.fa_fol_date = spans.fa_fol_date + AND max_seqs.fa_fol_b_focal_animid = spans.fa_fol_b_focal_animid + AND max_seqs.fa_b_arr_animid = spans.fa_b_arr_animid + ) AS max_nums + WHERE follow_arrival.fa_fol_date = spans.fa_fol_date + AND follow_arrival.fa_fol_b_focal_animid = spans.fa_fol_b_focal_animid + AND follow_arrival.fa_b_arr_animid = spans.fa_fol_b_focal_animid + -- Update only those follow arrivals with a min or max sequence + -- number, and only update the follow_arrivals where the follow + -- says there is nesting even though the follow_arrival + -- does not. We don't care about vice-versa; + -- the follow_arrival value is what is converted. + AND ((follow_arrival.fa_seq_num = min_nums.min_seq + AND (follow_arrival.fa_type_of_nesting <> 1 -- in @ start, only + AND follow_arrival.fa_type_of_nesting <> 3 -- in @ both + AND follow.fol_flag_begin_in_nest = 1)) + OR (follow_arrival.fa_seq_num = max_nums.max_seq + AND (follow_arrival.fa_type_of_nesting <> 2 -- in @ end, only + AND follow_arrival.fa_type_of_nesting <> 3 -- in @ both + AND follow.fol_flag_end_in_nest = 1))); -- 2.34.1